VIBRANT Trial - Primary Analyses

Author

Laura Symul, Laura Vermeren, Susan Holmes

Code
library(tidyverse)
library(magrittr)
library(gt)
library(patchwork)
library(SummarizedExperiment)
library(tidySummarizedExperiment)
library(brms)
library(gtsummary)
library(labelled)
library(scales)
# library(gridExtra)
library(ggpubr)
# library(mia) # BiocManager::install("mia")

theme_set(theme_light())
tmp <- fs::dir_map("R", source)
Code
#data_source <- "real"

# Introduction

This document presents the primary analyses of the VIBRANT trial.

Currently, this runs on blinded data.

Loading the data

TODO: change path

Code
mae <- readRDS("/Users/laurasymul/OneDrive - UCL/Academia/Research/VIBRANT data UCLouvain/actual data/03 QCed MAEs/mae_full_20250527.rds")
mae
A MultiAssayExperiment object of 6 listed
 experiments with user-defined names and respective classes.
 Containing an ExperimentList class object of length 6:
 [1] visit_and_sample_info: SummarizedExperiment with 7 rows and 5209 columns
 [2] mg: SummarizedExperiment with 778 rows and 966 columns
 [3] qPCR: SummarizedExperiment with 16 rows and 1014 columns
 [4] amplicon: SummarizedExperiment with 817 rows and 4113 columns
 [5] luminex: SummarizedExperiment with 42 rows and 850 columns
 [6] primary_outcomes: SummarizedExperiment with 2 rows and 941 columns
Functionality:
 experiments() - obtain the ExperimentList instance
 colData() - the primary/phenotype DataFrame
 sampleMap() - the sample coordination DataFrame
 `$`, `[`, `[[` - extract colData columns, subset, or experiment
 *Format() - convert into a long or wide DataFrame
 assays() - convert ExperimentList to a SimpleList of matrices
 exportClass() - save data to flat files

Analyses

Demographics (Table 1)

Primary outcomes (Table 2)

Colonization by week 5 : TRUE (blinded) ARMS

Code
col_by_week5 <- 
  mae[["primary_outcomes"]] |> 
  as_tibble() |> 
  dplyr::left_join(
    mae@colData |> as_tibble() |> 
      select(uid, pid, visit_code, site, location, randomized, arm) |> 
      mutate(.sample = uid),
    by = join_by(.sample)
  ) |> 
  filter(randomized, visit_code == "1500", .feature == "colonized_by") |> 
  mutate(LBP_colonization_by_week5 = outcome) 

Visualization of the primary outcome

Code
col_by_week5 |> 
  arrange(site, arm, LBP_colonization_by_week5) |>
  group_by(site, arm) |> 
  mutate(participant_number = row_number() |> factor()) |> 
  ungroup() |> 
  ggplot(aes(x = participant_number, y = site |> fct_rev(), col = site, fill = site, shape = LBP_colonization_by_week5)) +
  geom_point(size = 3) +
  facet_grid(. ~ arm, scales = "free_x", space = "free_x") +
  guides(color = "none", fill = "none") +
  scale_shape_manual("Colonized by LBP by week 5", values = c(1, 16)) +
  xlab("Participant number") + ylab("") +
  theme(
    legend.position = "top",
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
  ) +
  ggtitle("DATA STILL BLINDED")

Code
summary_table <- 
  col_by_week5 |> 
  group_by(site, arm) |> 
  summarize(
    n = n(), 
    n_success = sum(LBP_colonization_by_week5),
    .groups = "drop"
  ) |> 
  mutate(
    p = n_success / n,
    CI = binom::binom.confint(n_success, n, method = "exact")
  )
Code
table_2 <- 
  summary_table |> 
  mutate(
    N_per_site_and_arm = str_c("N = ", n),
    LBP_strain_detected = str_c(n_success, " (", round(p * 100)," %)"),
    CI_text = str_c(round(CI$lower * 100), "% - ", round(CI$upper * 100), "%")
  ) |> 
  dplyr::select(
    site, arm, N_per_site_and_arm, LBP_strain_detected, CI_text
  ) |> 
  pivot_longer(-c(site, arm)) |>
  mutate(
    name = 
      case_when(
        name == "N_per_site_and_arm" ~ "N participants",
        name == "LBP_strain_detected" ~ "n (%) participants\nwith LBP strain detected",
        name == "CI_text" ~ "95% CI"
      )
  ) |> 
  pivot_wider(
    id_cols = c(site, name), 
    names_from = arm,
    values_from = value, values_fill = ""
    )
Code
table_2 |> 
  group_by(site) |> 
  gt(caption = "DATA STILL BLINDED; Table 1: Colonization by week 5 by arm and site", row_group_as_column = TRUE) |> 
  cols_width(
    "name" ~ px(200),
    everything() ~ px(120)
  ) |> 
  cols_label(
    name = "",
    Blinded = "All blinded arms",
    # Pl = "Placebo",
    # `LC-106-7` = "LC-106<br>7 days",
    # `LC-106-3` = "LC-106<br>3 days",
    # `LC-106-o` = "LC-106<br>overlap",
    # `LC-115` = "LC-115<br>7 days",
    .fn = md
  )
DATA STILL BLINDED; Table 1: Colonization by week 5 by arm and site
LC-106-o All blinded arms
CAP N participants N = 14 N = 48
n (%) participants with LBP strain detected 7 (50 %) 23 (48 %)
95% CI 23% - 77% 33% - 63%
MGH N participants N = 1 N = 18
n (%) participants with LBP strain detected 1 (100 %) 14 (78 %)
95% CI 3% - 100% 52% - 94%

Tests / models

Code
col_by_week5_agg <- 
  col_by_week5 |>
  group_by(site, arm) |>
  summarise(success = LBP_colonization_by_week5 |> sum(), ntrials = n()) 

bfit <- 
  brm(
    success | trials(ntrials) ~ arm + site + site:arm, 
    data = col_by_week5_agg, 
    family = binomial("logit")
    )
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.3)’
using SDK: ‘’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Users/laurasymul/Library/R/arm64/4.4/library/Rcpp/include/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/unsupported"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/BH/include" -I"/Users/laurasymul/Library/R/arm64/4.4/library/StanHeaders/include/src/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/StanHeaders/include/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/RcppParallel/include/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Users/laurasymul/Library/R/arm64/4.4/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Users/laurasymul/Library/R/arm64/4.4/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/Eigen/Core:19:
/Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
  679 | #include <cmath>
      |          ^~~~~~~
1 error generated.
make: *** [foo.o] Error 1

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 1.8e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.18 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 0.079 seconds (Warm-up)
Chain 1:                0.126 seconds (Sampling)
Chain 1:                0.205 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 2e-06 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 0.072 seconds (Warm-up)
Chain 2:                0.08 seconds (Sampling)
Chain 2:                0.152 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 2e-06 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 0.075 seconds (Warm-up)
Chain 3:                0.099 seconds (Sampling)
Chain 3:                0.174 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 2e-06 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 0.08 seconds (Warm-up)
Chain 4:                0.083 seconds (Sampling)
Chain 4:                0.163 seconds (Total)
Chain 4: 
Code
summary(bfit)
plot(bfit, ask = FALSE)
bind_cols(col_by_week5_agg, predict(bfit))
# loo(bfit, moment_match = TRUE, reloo = TRUE)
# pp_check(bfit)
Code
plot_res <- plot(conditional_effects(bfit), ask = FALSE) 

Code
Reduce(`+`, plot_res) +
  plot_layout(widths = c(2, 1, 3))

Code
model_results <- summary(bfit)$fixed
model_results <- 
  model_results |> 
  rownames_to_column("term") |> 
  as_tibble() |> 
  mutate(
    factor = 
      case_when(
        str_detect(term, ":site") ~ "Arm x Site",
        str_detect(term, "^site") ~ "Site",
        str_detect(term, "^arm") ~ "Arm",
        TRUE ~ "Ref.",
      ),
    factor_level = 
      term |> 
      str_remove("arm") |> str_remove("site") |> str_replace(":"," x ") |> 
      str_replace("6M", "6-") |> str_replace("CM", "C-")
  ) |> 
  mutate(
    OR = ifelse(factor == "Ref.", NA, exp(Estimate)),
    lower_CI = ifelse(factor == "Ref.", NA, exp(`l-95% CI`)),
    upper_CI = ifelse(factor == "Ref.", NA, exp(`u-95% CI`))
  ) 
Code
model_results |> 
  select(factor, factor_level, OR, lower_CI, upper_CI) |>
  gt() |> 
  gt::fmt_number(
    decimals = 2, n_sigfig = 3
  )
factor factor_level OR lower_CI upper_CI
Ref. Intercept NA NA NA
Arm Blinded 0.931 0.262 3.37
Site MGH 46,900 0.363 2,250,000,000,000,000,000
Arm x Site Blinded x MGH 0.0000884 0.00000000000000000197 13.6
Code
model_results_arm <- 
  model_results |> 
  filter(factor == "Arm") |> 
  mutate(
    OR = OR |> round(2) |> as.character(),
    CI = str_c(lower_CI |> round(2), " - ", upper_CI |> round(2))
    ) |> 
  select(factor_level, OR, CI) |>
  pivot_longer(cols = -factor_level, names_to = "name", values_to = "value") |>
  pivot_wider(names_from = factor_level, values_from = value) 
Code
table_2_with_model_results <- 
  bind_rows(
    table_2,
    model_results_arm
  ) |> 
  mutate(
    site = site |> str_replace_na(""),
    Pl = 
      case_when(
        name == "OR" ~ "Ref.",
        name == "CI" ~ "",
        TRUE ~ Pl
      )
  )
Code
table_2_with_model_results |> 
  group_by(site) |>
  gt(caption = "Table 2: Colonization by week 5 by arm and site", row_group_as_column = TRUE) |> 
  cols_width(
    "name" ~ px(200),
    everything() ~ px(120)
  ) |> 
  cols_label(
    name = "",
    Pl = "Placebo",
    `LC-106-7` = "LC-106<br>7 days",
    `LC-106-3` = "LC-106<br>3 days",
    `LC-106-o` = "LC-106<br>overlap",
    `LC-115` = "LC-115<br>7 days",
    .fn = md
  )

Colonization by week 5: RANDOM ARMS

Code
col_by_week5 <- 
  mae[["primary_outcomes"]] |> 
  as_tibble() |> 
  dplyr::left_join(
    mae@colData |> as_tibble() |> 
      select(uid, pid, visit_code, site, location, randomized, arm) |> 
      mutate(.sample = uid),
    by = join_by(.sample)
  ) |> 
  filter(randomized, visit_code == "1500", .feature == "colonized_by") |> 
  mutate(LBP_colonization_by_week5 = outcome) |> 
  mutate(
    arm = 
      case_when(
        (arm == "LC-106-o") ~ arm,
        (location == "US") ~ sample(c("Pl", "LC-106-7", "LC-115"), n(), replace = TRUE),
        (location == "SA") ~ sample(c("Pl", "LC-106-7", "LC-106-3", "LC-115"), n(), replace = TRUE),
        TRUE ~ "Blinded"
      ) |> 
      factor(levels = c("Pl", "LC-106-7", "LC-106-3", "LC-106-o", "LC-115", "Blinded"))
  ) 

Visualization of the primary outcome

Code
col_by_week5 |> 
  arrange(site, arm, LBP_colonization_by_week5) |>
  group_by(site, arm) |> 
  mutate(participant_number = row_number() |> factor()) |> 
  ungroup() |> 
  ggplot(aes(x = participant_number, y = site |> fct_rev(), col = site, fill = site, shape = LBP_colonization_by_week5)) +
  geom_point(size = 3) +
  facet_grid(. ~ arm, scales = "free_x", space = "free_x") +
  guides(color = "none", fill = "none") +
  scale_shape_manual("Colonized by LBP by week 5", values = c(1, 16)) +
  xlab("Participant number") + ylab("") +
  theme(
    legend.position = "top",
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
  ) +
  ggtitle("RANDOM ARMS; DATA STILL BLINDED")

Code
summary_table <- 
  col_by_week5 |> 
  group_by(site, arm) |> 
  summarize(
    n = n(), 
    n_success = sum(LBP_colonization_by_week5),
    .groups = "drop"
  ) |> 
  mutate(
    p = n_success / n,
    CI = binom::binom.confint(n_success, n, method = "exact")
  )
Code
table_2 <- 
  summary_table |> 
  mutate(
    N_per_site_and_arm = str_c("N = ", n),
    LBP_strain_detected = str_c(n_success, " (", round(p * 100)," %)"),
    CI_text = str_c(round(CI$lower * 100), "% - ", round(CI$upper * 100), "%")
  ) |> 
  dplyr::select(
    site, arm, N_per_site_and_arm, LBP_strain_detected, CI_text
  ) |> 
  pivot_longer(-c(site, arm)) |>
  mutate(
    name = 
      case_when(
        name == "N_per_site_and_arm" ~ "N participants",
        name == "LBP_strain_detected" ~ "n (%) participants\nwith LBP strain detected",
        name == "CI_text" ~ "95% CI"
      )
  ) |> 
  pivot_wider(
    id_cols = c(site, name), 
    names_from = arm,
    values_from = value, values_fill = ""
    )
Code
table_2 |> 
  group_by(site) |> 
  gt(caption = "RANDOM ARMS, DATA STILL BLINDED; Table 1: Colonization by week 5 by arm and site", row_group_as_column = TRUE) |> 
  cols_width(
    "name" ~ px(200),
    everything() ~ px(120)
  ) |> 
  cols_label(
    name = "",
    Pl = "Placebo",
    `LC-106-7` = "LC-106<br>7 days",
    `LC-106-3` = "LC-106<br>3 days",
    `LC-106-o` = "LC-106<br>overlap",
    `LC-115` = "LC-115<br>7 days",
    .fn = md
  )
RANDOM ARMS, DATA STILL BLINDED; Table 1: Colonization by week 5 by arm and site
Placebo LC-106
7 days
LC-106
3 days
LC-106
overlap
LC-115
7 days
CAP N participants N = 13 N = 9 N = 13 N = 14 N = 13
n (%) participants with LBP strain detected 7 (54 %) 4 (44 %) 5 (38 %) 7 (50 %) 7 (54 %)
95% CI 25% - 81% 14% - 79% 14% - 68% 23% - 77% 25% - 81%
MGH N participants N = 7 N = 8 N = 1 N = 3
n (%) participants with LBP strain detected 6 (86 %) 6 (75 %) 1 (100 %) 2 (67 %)
95% CI 42% - 100% 35% - 97% 3% - 100% 9% - 99%

Tests / models

Code
col_by_week5_agg <- 
  col_by_week5 |>
  group_by(site, arm) |>
  summarise(success = LBP_colonization_by_week5 |> sum(), ntrials = n()) 

bfit <- 
  brm(
    success | trials(ntrials) ~ arm + site + site:arm, 
    data = col_by_week5_agg, 
    family = binomial("logit")
    )
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.3)’
using SDK: ‘’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Users/laurasymul/Library/R/arm64/4.4/library/Rcpp/include/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/unsupported"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/BH/include" -I"/Users/laurasymul/Library/R/arm64/4.4/library/StanHeaders/include/src/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/StanHeaders/include/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/RcppParallel/include/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Users/laurasymul/Library/R/arm64/4.4/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Users/laurasymul/Library/R/arm64/4.4/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/Eigen/Core:19:
/Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
  679 | #include <cmath>
      |          ^~~~~~~
1 error generated.
make: *** [foo.o] Error 1

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 2e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.2 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 0.397 seconds (Warm-up)
Chain 1:                0.49 seconds (Sampling)
Chain 1:                0.887 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 2e-06 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 0.407 seconds (Warm-up)
Chain 2:                0.477 seconds (Sampling)
Chain 2:                0.884 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 2e-06 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 0.293 seconds (Warm-up)
Chain 3:                0.453 seconds (Sampling)
Chain 3:                0.746 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 2e-06 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 0.412 seconds (Warm-up)
Chain 4:                0.467 seconds (Sampling)
Chain 4:                0.879 seconds (Total)
Chain 4: 
Code
summary(bfit)
plot(bfit, ask = FALSE)
bind_cols(col_by_week5_agg, predict(bfit))
# loo(bfit, moment_match = TRUE, reloo = TRUE)
# pp_check(bfit)
Code
plot_res <- plot(conditional_effects(bfit), ask = FALSE) 

Code
Reduce(`+`, plot_res) +
  plot_layout(widths = c(2, 1, 3))

Code
model_results <- summary(bfit)$fixed
model_results <- 
  model_results |> 
  rownames_to_column("term") |> 
  as_tibble() |> 
  mutate(
    factor = 
      case_when(
        str_detect(term, ":site") ~ "Arm x Site",
        str_detect(term, "^site") ~ "Site",
        str_detect(term, "^arm") ~ "Arm",
        TRUE ~ "Ref.",
      ),
    factor_level = 
      term |> 
      str_remove("arm") |> str_remove("site") |> str_replace(":"," x ") |> 
      str_replace("6M", "6-") |> str_replace("CM", "C-")
  ) |> 
  mutate(
    OR = ifelse(factor == "Ref.", NA, exp(Estimate)),
    lower_CI = ifelse(factor == "Ref.", NA, exp(`l-95% CI`)),
    upper_CI = ifelse(factor == "Ref.", NA, exp(`u-95% CI`))
  ) 
Code
model_results |> 
  select(factor, factor_level, OR, lower_CI, upper_CI) |>
  gt() |> 
  gt::fmt_number(
    decimals = 2, n_sigfig = 3
  )
factor factor_level OR lower_CI upper_CI
Ref. Intercept NA NA NA
Arm LC-106-7 0.646 0.109 3.75
Arm LC-106-3 0.503 0.0971 2.40
Arm LC-106-o 0.818 0.160 3.82
Arm LC-115 0.972 0.194 4.89
Site MGH 7.67 0.710 191
Arm x Site LC-106-7 x MGH 0.587 0.0138 18.0
Arm x Site LC-106-3 x MGH Inf 0 Inf
Arm x Site LC-106-o x MGH 12,500,000,000 0.0873 10,900,000,000,000,000,406,931,255,578,549,704,681,695,739,904
Arm x Site LC-115 x MGH 0.285 0.00408 17.1
Code
model_results_arm <- 
  model_results |> 
  filter(factor == "Arm") |> 
  mutate(
    OR = OR |> round(2) |> as.character(),
    CI = str_c(lower_CI |> round(2), " - ", upper_CI |> round(2))
    ) |> 
  select(factor_level, OR, CI) |>
  pivot_longer(cols = -factor_level, names_to = "name", values_to = "value") |>
  pivot_wider(names_from = factor_level, values_from = value) |> 
  mutate(
    name = 
      case_when(
        name == "OR" ~ "OR",
        name == "CI" ~ "95% CI",
        TRUE ~ name
      )
  )
Code
table_2_with_model_results <- 
  bind_rows(
    table_2,
    model_results_arm
  ) |> 
  mutate(
    site = site |> str_replace_na(""),
    Pl = 
      case_when(
        name == "OR" ~ "Ref.",
        str_detect(name, "CI") ~ "",
        TRUE ~ Pl
      )
  )
Code
table_2_with_model_results |> 
  group_by(site) |>
  gt(caption = "Table 2: Colonization by week 5 by arm and site", row_group_as_column = TRUE) |> 
  cols_width(
    "name" ~ px(150),
    everything() ~ px(120)
  ) |> 
  cols_label(
    name = "",
    Pl = "Placebo",
    `LC-106-7` = "LC-106<br>7 days",
    `LC-106-3` = "LC-106<br>3 days",
    `LC-106-o` = "LC-106<br>overlap",
    `LC-115` = "LC-115<br>7 days",
    .fn = md
  )
Table 2: Colonization by week 5 by arm and site
Placebo LC-106
7 days
LC-106
3 days
LC-106
overlap
LC-115
7 days
CAP N participants N = 13 N = 9 N = 13 N = 14 N = 13
n (%) participants with LBP strain detected 7 (54 %) 4 (44 %) 5 (38 %) 7 (50 %) 7 (54 %)
95% CI 14% - 79% 14% - 68% 23% - 77% 25% - 81%
MGH N participants N = 7 N = 8 N = 1 N = 3
n (%) participants with LBP strain detected 6 (86 %) 6 (75 %) 1 (100 %) 2 (67 %)
95% CI 35% - 97% 3% - 100% 9% - 99%
OR Ref. 0.65 0.5 0.82 0.97
95% CI 0.11 - 3.75 0.1 - 2.4 0.16 - 3.82 0.19 - 4.89

Secondary outcomes (Figures 2-4)

Kinetics of colonization

Total proportions of LBP isolates

Code
total_rel_ab_of_LBP <- 
  mae[["mg"]] |> 
  as_tibble() |> 
  filter(!is.na(LBP)) |> 
  group_by(.sample, uid) |>
  summarize(
    total_LBP_rel_ab = sum(rel_abs_bact, na.rm = TRUE),
    .groups = "drop"
  )

total_rel_ab_of_LBP <- 
  total_rel_ab_of_LBP |> 
  dplyr::left_join(mae |> colData() |> as_tibble(), by = join_by(uid))
Code
g_tot_prop_LBP_strains <- 
  total_rel_ab_of_LBP |>
  filter(visit_type == "Clinic", !is.na(arm)) |> 
  ggplot(aes(x = study_week, y = total_LBP_rel_ab, col = arm)) +
  geom_line(aes(group = pid), alpha = 0.5) +
  geom_point(size = 0.5, alpha = 0.5) +
  facet_grid(site ~ arm) + 
  scale_x_continuous("Study weeks", breaks = c(0:5,7,9,12), minor_breaks = 0:12) +
  scale_y_continuous(
    "Total proportion of LBP strains", labels = scales::percent,
    breaks = seq(0, 1, by = 0.2)
  ) +
  expand_limits(y = 0) +
  scale_color_manual(values = get_arm_colors()) +
  guides(col = "none") +
  ggtitle(make_title(mae)) 
Code
g_tot_prop_LBP_strains

Proportions of participants who colonized

Code
g_prop_colonized_by <- 
  total_rel_ab_of_LBP |> 
  filter(!is.na(arm)) |> 
  group_by(site, arm, study_week) |>
  summarize(
    prop_who_colonized_at = mean(total_LBP_rel_ab > 0.05), 
    CI_low = 
      prop.test(
        x = sum(total_LBP_rel_ab > 0.05), n = n(), 
        conf.level = 0.95)$conf.int[1],
    CI_high = 
      prop.test(
        x = sum(total_LBP_rel_ab > 0.05), n = n(), 
        conf.level = 0.95)$conf.int[2],
    .groups = "drop"
  ) |>
  ggplot(aes(x = study_week, y = prop_who_colonized_at, col = arm)) +
  geom_ribbon(
    aes(ymin = CI_low, ymax = CI_high, fill = arm), 
    alpha = 0.2, color = NA
  ) +
  geom_line() +
  geom_point() +
  facet_grid(site ~ .) + 
  scale_x_continuous("Study weeks", breaks = c(0:5,7,9,12), minor_breaks = 0:12) +
  scale_y_continuous(
    "Proportion of participants\nwith detected LBP strains", labels = scales::percent,
    breaks = seq(0, 1, by = 0.2)
  ) +
  expand_limits(y = 0) +
  scale_color_manual("Arm", values = get_arm_colors()) +
  scale_fill_manual("Arm", values = get_arm_colors()) +
  ggtitle(make_title(mae)) 

g_prop_colonized_by

Figure 2

Code
 g_prop_colonized_by + 
  g_tot_prop_LBP_strains +
  plot_layout(guides = "collect", widths = c(1, 2)) +
  plot_annotation(tag_level = "a") &
  theme(legend.position = "bottom")

Microbiota trajectories

Code
rel_abs <- 
  mae[["mg"]] |> 
  as_tibble() |> 
  select(
    .feature, .sample, uid, rel_abs_bact, 
    taxon_label, genus, LBP, strain_origin
    ) 
Code
rel_abs <- 
  rel_abs |> 
  group_by(.feature) |> 
  mutate(max_rel_ab = max(rel_abs_bact, na.rm = TRUE)) |> 
  ungroup() |> 
  arrange(is.na(LBP), -max_rel_ab) |> 
  mutate(.feature = .feature |> fct_inorder()) |> 
  mutate(
    taxon = 
      case_when(
        as.numeric(.feature) <= 29 ~ taxon_label,
        TRUE ~ "Other"
      )
  ) |> 
  arrange(is.na(LBP), LBP, genus != "Lactobacillus", -max_rel_ab) |> 
  mutate(taxon = taxon |> fct_inorder() |> fct_relevel("Other", after = Inf))
Code
rel_abs_agg <- 
  rel_abs |> 
  group_by(uid, taxon, LBP, strain_origin) |> 
  summarize(rel_ab = sum(rel_abs_bact), .groups = "drop") |> 
  dplyr::left_join(mae |> colData() |> as_tibble(), by = join_by(uid)) 
Code
g_trajectories <-
  rel_abs_agg |>
  mutate(study_week = case_when(visit_code == "0000" ~ -1, TRUE ~ study_week)) |>
  filter(randomized != 0, visit_type == "Clinic", !is.na(study_week)) |>
  group_by(pid) |> 
  mutate(
    tot_LC115 = sum(rel_ab[LBP == "LC-115"]),
    tot_LBP = sum(rel_ab[!is.na(LBP)])
    ) |> 
  ungroup() |> 
  arrange(-tot_LC115, -tot_LBP) |> 
  mutate(pid = pid |> fct_inorder()) |> 
  ggplot(aes(y = rel_ab, x = pid, fill = taxon)) +
  geom_col() +
  facet_grid(study_week ~ site + arm, scales = "free", space = "free") +
  # theme(panel.spacing.x = unit(0, "points")) +
  scale_fill_manual(
    values = get_taxa_colors(rel_abs_agg$taxon |> levels()),
    guide = guide_legend(ncol = 1)
    ) +
  scale_y_continuous(
    "relative abundance", labels = scales::percent,
    breaks = seq(0, 0.5, by = 0.5), minor_breaks = seq(0, 1, by = 0.25)
  ) +
  scale_x_discrete("Participants") +
  theme(
    strip.text.y = element_text(angle = 0, hjust = 0),
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
  )
Code
g_trajectories

Individual LBP strains by sites

Code
LBP_strains <- 
  mae[["mg"]] |> 
  as_tibble() |> 
  filter(!is.na(LBP)) |> 
  select(.feature, .sample, uid, rel_abs_bact, LBP, strain_origin) |> 
  left_join(mae |> colData() |> as_tibble())
Code
LBP_strains |> 
  filter(!is.na(arm), !is.na(study_week), study_week >= 2) |> 
  ggplot() +
  aes(x = .feature, y = rel_abs_bact, col = site) +
  geom_point(alpha = 0.5, size = 0.5) +
  # geom_boxplot(outlier.size = 0.5) +
  facet_grid(study_week ~ LBP + strain_origin , scales = "free_x", space = "free_x") +
  xlab("LBP strain") +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
  ) 

Code
LBP_strains |> 
  filter(!is.na(arm), !is.na(study_week), study_week >= 2) |> 
  ggplot() +
  aes(x = .feature, y = rel_abs_bact, col = site) +
  # geom_point(alpha = 0.5, size = 0.5) +
  geom_boxplot(outlier.size = 0.5) +
  facet_grid(study_week ~ LBP + strain_origin , scales = "free_x", space = "free_x") +
  xlab("LBP strain") +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
  ) 

Code
LBP_strains |> 
  filter(!is.na(arm), !is.na(study_week), study_week >= 2) |> 
  ggplot() +
  aes(x = study_week |> factor(), y = rel_abs_bact, col = strain_origin) +
  geom_point(alpha = 0.5, size = 0.5) +
  geom_line(aes(group = pid), alpha = 0.5) +
  facet_grid(site ~ LBP + strain_origin + .feature , scales = "free_x", space = "free_x") +
  xlab("Study week") +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
  )